# 统计唐诗三百首中词语出现次数
import jieba
with open('唐诗三百首.txt','r',encoding='utf-8')as fr:
    text=fr.read();
words=jieba.lcut(text)
counts={}
for word in words:
    counts[word]=counts.get(word,0)+1
items=list(counts.get())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(30):
    word,count=items[i]
    print("{0:<10}{1:>5}".format(word ,count))

# 应用举例 词频分析 改进
import jieba
def stopwordslist(filepath):
    stopwords = [line.strip() for line in open(filepath,'r',encoding='utf-8').readlines()]
    return stopwords
stopwords = stopwordslist('中文停用词。txt')
with open('唐诗三百首.txt','r',encoding='utf_8') as fr:
    txt=fr.read();
words=jieba.lcut(txt)
counts={}
for word in words:
    if word not in stopwords:
        if len(word)!=1:
            counts[word]=counts.get(word,0)+1
items=list(counts.items())
items,sort(key=lambda x:x[i],reverse=True)
for i in range(30):
    word,count=items[i]
    print("{0}({1}),".format(word,count),end='')
